# coding=utf-8
__author__ = 'loubobooo'

# 导入jieba模块，用于中文分词
import jieba
# 导入matplotlib，用于生成2D图形
import matplotlib.pyplot as plt
# 导入wordcount，用于制作词云图
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import json

# 获取所有评论
comments = []
with open('collection.log', mode='r', encoding='utf-8') as f:
    rows = f.readlines()
    for row in rows:
        r = row.rstrip()
        text = json.loads(r)
        genres = text.get('genres')
        for i in range(len(genres)):
            comments.append(genres[i]['name'])

# 设置分词
comment_after_split = jieba.cut(str(comments), cut_all=False)  # 非全模式分词，cut_all=false
words = ' '.join(comment_after_split)  # 以空格进行拼接
# print(words)

# 设置屏蔽词
stopwords = STOPWORDS.copy()
# stopwords.add('巴乐兔')
# stopwords.add('租房')
# stopwords.add('一个')
# stopwords.add('没有')
# stopwords.add('什么')
# stopwords.add('房东')
# stopwords.add('公寓')
# stopwords.add('联系')
# stopwords.add('房东')
# stopwords.add('推荐')
# stopwords.add('直接')
# stopwords.add('软件')
# stopwords.add('还是')
# stopwords.add('但是')
# stopwords.add('就是')
# stopwords.add('这个')
# stopwords.add('平台')

# 导入背景图
bg_image = plt.imread('bg.png')


# 设置词云参数，参数分别表示：画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小
wc = WordCloud(scale=4, background_color='white', mask=bg_image, font_path='/Library/Fonts/STHeiti Light.ttc',
               stopwords=stopwords, max_font_size=400, random_state=50)
# 将分词后数据传入云图
wc.generate_from_text(words)
plt.imshow(wc)
plt.axis('off')  # 不显示坐标轴
plt.show()
# 保存结果到本地
wc.to_file('词云图.jpg')
